library(GSVA)
library(openxlsx)
library(tidyverse)
library(Biobase)
library(ComplexHeatmap)
library(circlize)
library(hypeR)

PATH <- file.path(Sys.getenv("MLAB"), "projects/brcameta/exosome/4t1_brca_brain_mets/")
TCGAPATH <- file.path(Sys.getenv("CBM"),"TCGA-GDC/")
METABRICPATH <- file.path(Sys.getenv("CBM"), "METABRIC/")
DPATH <- file.path(Sys.getenv("CBM"),"otherStudies/RNAseq/2022-06-03-DenisExosomeBrCaBrainMets/")

do_save <- TRUE

Loading Data

tcga_brca <- readRDS(file.path(TCGAPATH,"/esets_filtered/TCGA-BRCA_2020-03-22_DESeq2_log_filtered_eset.rds"))

metabric <- readRDS(file.path(METABRICPATH, "ESets/metabric_GE_ESet.rds"))
metabric_impute <- readRDS(file.path(METABRICPATH, "ESets/metabric_GE_ESet_impute.rds"))
#Signatures

## Obesity signature from Fuentes-Mattei et al., JNCI 2014, Table S3.
obDiff <- read.xlsx( file.path(PATH,"data/JNCI2014_SupplementaryTable3_ObeseSignatureBrCa.xlsx") ) 
colnames(obDiff)[match(c("Gene.Title.(Definition)","Log.ratio.(Log10)","p-value*"),colnames(obDiff))] <-
    c("Gene.Title","Log10.ratio","pvalue")
obSig <- list(
    ob.up=dplyr::filter(obDiff,Log10.ratio>0) %>% select(Gene.Symbol) %>% drop_na() %>% distinct() %>% pull(),
    ob.dn=dplyr::filter(obDiff,Log10.ratio<0) %>% select(Gene.Symbol) %>% drop_na() %>% distinct() %>% pull())

## Signatures from RNA-Seq experiments
all_sigs <- readRDS(file.path(PATH,"data/signatures_symbol.rds"))
all_sigs <- all_sigs[lapply(all_sigs, length) > 1]

all_sigs_human <- lapply(all_sigs, babelgene::orthologs, species="mouse", human=FALSE)
all_sigs_human <- lapply(all_sigs_human, function (x) x %>% dplyr::pull(human_symbol))

all_sigs_human <- c(all_sigs_human, obSig)

GSVA

# GSVA
if (do_save) {
  #subtype_filter <- is.na(tcga_brca$subtype_selected)
  #tcga_brca <- tcga_brca[,!subtype_filter]
  #tcga_brca <- tcga_brca[,tcga_brca$subtype_selected == "BRCA.LumA"]
  #metabric <- metabric[,metabric$Pam50_SUBTYPE == "LumA"]
  tcga_gsva <- gsva(tcga_brca, all_sigs_human, mx.diff=TRUE, verbose=FALSE)
  metabric_gsva <- gsva(metabric, all_sigs_human, mx.diff=TRUE, verbose=FALSE)
  saveRDS(metabric_gsva, file.path(PATH, "data/metabric_gsva_res_obs.rds"))
  saveRDS(tcga_gsva, file.path(PATH, "data/tcga_gsva_res_obs.rds"))  
} else {
  tcga_gsva <- readRDS(file.path(PATH, "data/tcga_gsva_res_obs.rds"))
  metabric_gsva <- readRDS(file.path(PATH, "data/metabric_gsva_res_obs.rds"))
}
# Subtype filtering
metabric_gsva <- metabric_gsva[,metabric_gsva$Pam50_SUBTYPE != "NC"]
subtype_filter <- is.na(tcga_gsva$subtype_selected)
tcga_gsva <- tcga_gsva[,!subtype_filter]

pData(tcga_gsva) <- pData(tcga_gsva) %>% mutate(pam50subtype = str_split(subtype_selected, pattern="BRCA.", simplify=TRUE)[,2])
tcga_gsva$ir_c <- t(exprs(tcga_gsva["ir_c_up",]) - exprs(tcga_gsva["ir_c_down"]))
tcga_gsva$is_c <- t(exprs(tcga_gsva["is_c_up",])-exprs(tcga_gsva["is_c_down",]))
tcga_gsva$ir_is <- t(exprs(tcga_gsva["ir_is_up",])-exprs(tcga_gsva["ir_is_down",]))
tcga_gsva$ob <- t(exprs(tcga_gsva["ob.up",])-exprs(tcga_gsva["ob.dn",]))

metabric_gsva$ir_c <- t(exprs(metabric_gsva["ir_c_up",]) - exprs(metabric_gsva["ir_c_down"]))
metabric_gsva$is_c <- t(exprs(metabric_gsva["is_c_up",])-exprs(metabric_gsva["is_c_down",]))
metabric_gsva$ir_is <- t(exprs(metabric_gsva["ir_is_up",])-exprs(metabric_gsva["ir_is_down",]))
metabric_gsva$ob <- t(exprs(metabric_gsva["ob.up",])-exprs(metabric_gsva["ob.dn",]))

Boxplots

col.pam50 <- c(LumA="pink",LumB="red",Her2="yellow",Basal="darkgray",Normal="white")
for (gset in c("ir_c", "is_c", "ir_is", "ob")) {
    par(mfrow=c(1,2))
    boxplot(pData(tcga_gsva)[,gset] ~ tcga_gsva$pam50subtype, col=col.pam50, las=2,
            xlab="pam50",ylab=gset,main="TCGA" )
    boxplot(pData(metabric_gsva)[,gset] ~ metabric_gsva$Pam50_SUBTYPE, col=col.pam50, las=2,
            xlab="pam50",ylab=gset,main="METABRIC" )
}

Correlation with Obesity

TCGA Heatmap

sample_col <- hcl.colors(n=nlevels(tcga_gsva$pam50subtype %>% factor))
names(sample_col) <- levels(tcga_gsva$pam50subtype %>% factor)
ha.genes <- HeatmapAnnotation(sample_type=tcga_gsva$pam50subtype,
                              col=list(sample_type=sample_col))

png(file=file.path(PATH, "results/GSVA_tcga_obs_Heatmap.png"))
gsva_heatmap <- Heatmap(t(scale(t(exprs(tcga_gsva)))),
        name="Enrichment Score", 
        col=colorRamp2(c(-3, 0, 3), c("#072448", "white", "#ff6150")),
        top_annotation=ha.genes, 
        cluster_rows=TRUE,
        cluster_columns=TRUE,
        clustering_distance_rows="euclidean",
        clustering_method_rows="ward.D",    
        clustering_distance_columns="euclidean",
        clustering_method_columns="ward.D", 
        show_parent_dend_line=TRUE,
        column_title = "samples",
        column_dend_height = unit(5, "mm"),
        row_title="Genesets",
        row_dend_width = unit(5, "mm"),
        show_column_names=FALSE,
        show_row_names=TRUE)
draw(gsva_heatmap)
dev.off()
## png 
##   2
gsva_heatmap

TCGA Corplot

library(ggcorrplot)
gs_cor <- cor(t(exprs(tcga_gsva)))

ggcorrplot(gs_cor, hc.order = TRUE, type = "lower",
   outline.col = "white",
   ggtheme = ggplot2::theme_gray,
   lab=TRUE,
   colors = c("#6D9EC1", "white", "#E46726"))

Metabric Heatmap

sample_col <- hcl.colors(n=nlevels(metabric_gsva$Pam50_SUBTYPE %>% factor))
names(sample_col) <- levels(metabric_gsva$Pam50_SUBTYPE %>% factor)
ha.genes <- HeatmapAnnotation(sample_type=metabric_gsva$Pam50_SUBTYPE,
                              col=list(sample_type=sample_col))

png(file=file.path(PATH, "results/GSVA_metabric_obs_Heatmap.png"))
gsva_heatmap <- Heatmap(t(scale(t(exprs(metabric_gsva)))),
        name="Enrichment Score", 
        col=colorRamp2(c(-3, 0, 3), c("#072448", "white", "#ff6150")),
        top_annotation=ha.genes, 
        cluster_rows=TRUE,
        cluster_columns=TRUE,
        clustering_distance_rows="euclidean",
        clustering_method_rows="ward.D",    
        clustering_distance_columns="euclidean",
        clustering_method_columns="ward.D", 
        show_parent_dend_line=TRUE,
        column_title = "samples",
        column_dend_height = unit(5, "mm"),
        row_title="Genesets",
        row_dend_width = unit(5, "mm"),
        show_column_names=FALSE,
        show_row_names=TRUE)
draw(gsva_heatmap)
dev.off()
## png 
##   2
gsva_heatmap

Metabric Corplot

gs_cor <- cor(t(exprs(metabric_gsva)))

ggcorrplot(gs_cor, hc.order = TRUE, type = "lower",
   outline.col = "white",
   ggtheme = ggplot2::theme_gray,
   lab=TRUE,
   colors = c("#6D9EC1", "white", "#E46726"))

KS-based enrichment

## Add gene symbols to DESeq tables
dat <- readRDS(file.path(PATH, "data/4T1_mets_sExp.rds"))
deseq_list <- readRDS(file.path(PATH,"data/deseq_list.rds"))
hallmark_genesets <- msigdb_download(species="Mus musculus", category="H")
hallmark_genesets$OBESITY <- obSig

deseq_list <- lapply(deseq_list, function(Z) {
  as.data.frame(Z) %>% 
    tibble::rownames_to_column(var="ensembl_id") %>%
    dplyr::mutate(geneID=unlist(rowData(dat)[ensembl_id,"mgi_symbol"]))
  })
## rank by up-regulation
rank_signatures_up <- lapply(
  deseq_list, function(sig) sig %>% 
    dplyr::filter(geneID!="") %>%
    dplyr::arrange(desc(log2FoldChange)) %>%
    dplyr::select(geneID,log2FoldChange) %>%
    tibble::deframe())
names(rank_signatures_up) <- paste(names(rank_signatures_up),"up",sep="_")

## rank by down-regulation
rank_signatures_dn <- lapply(
  deseq_list,function(sig) sig %>% 
    dplyr::filter(geneID!="") %>%
    dplyr::arrange(log2FoldChange) %>%
    dplyr::select(geneID,log2FoldChange) %>%
    tibble::deframe())
names(rank_signatures_dn) <- paste(names(rank_signatures_dn),"dn",sep="_")

rank_signatures <- c(rank_signatures_up,rank_signatures_dn)
rank_signatures <- rank_signatures[order(names(rank_signatures),decreasing = TRUE)]

Hallmarks + Pamm

#all_genesets <- c(hallmark_genesets, pamm_genelists)
max_fdr <- 0.05
ks_hall <- hypeR::hypeR(signature=rank_signatures,genesets=hallmark_genesets,test="ks",fdr=max_fdr,plotting=TRUE)
hyp_dots(ks_hall,merge=TRUE,fdr=max_fdr/5,top=25) + ggtitle(paste("FDR ≤", max_fdr/5))

#hypeR::rctbl_build(ks_hall, show_hmaps=FALSE)

IS C UP

print(ks_hall$data$is_c_up$plots[ks_hall$data$is_c_up$data$fdr<=0.01])
## $HALLMARK_INTERFERON_ALPHA_RESPONSE

## 
## $HALLMARK_INTERFERON_GAMMA_RESPONSE

## 
## $HALLMARK_KRAS_SIGNALING_UP

## 
## $HALLMARK_E2F_TARGETS

## 
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

## 
## $HALLMARK_G2M_CHECKPOINT

IS C Down

print(ks_hall$data$is_c_dn$plots[ks_hall$data$is_c_dn$data$fdr<=0.01])
## $HALLMARK_MYC_TARGETS_V1

## 
## $HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION

## 
## $HALLMARK_PROTEIN_SECRETION

## 
## $HALLMARK_MTORC1_SIGNALING

## 
## $HALLMARK_UV_RESPONSE_DN

## 
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

## 
## $HALLMARK_DNA_REPAIR

## 
## $HALLMARK_TGF_BETA_SIGNALING

IR IS Up

print(ks_hall$data$ir_is_up$plots[ks_hall$data$ir_is_up$data$fdr<=0.01])
## $HALLMARK_HYPOXIA

## 
## $HALLMARK_MYC_TARGETS_V1

## 
## $HALLMARK_MITOTIC_SPINDLE

## 
## $HALLMARK_UV_RESPONSE_DN

## 
## $HALLMARK_TNFA_SIGNALING_VIA_NFKB

## 
## $HALLMARK_GLYCOLYSIS

## 
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

IR IS Down

print(ks_hall$data$ir_is_dn$plots[ks_hall$data$ir_is_dn$data$fdr<=0.01])
## $HALLMARK_INTERFERON_ALPHA_RESPONSE

## 
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

## 
## $HALLMARK_INTERFERON_GAMMA_RESPONSE

## 
## $HALLMARK_MYC_TARGETS_V1

## 
## $HALLMARK_ALLOGRAFT_REJECTION

IR C Up

print(ks_hall$data$ir_c_up$plots[ks_hall$data$ir_c_up$data$fdr<=0.01])
## $HALLMARK_G2M_CHECKPOINT

## 
## $HALLMARK_E2F_TARGETS

## 
## $HALLMARK_HYPOXIA

## 
## $HALLMARK_INFLAMMATORY_RESPONSE

## 
## $HALLMARK_KRAS_SIGNALING_UP

## 
## $HALLMARK_MITOTIC_SPINDLE

## 
## $HALLMARK_MYC_TARGETS_V1

## 
## $HALLMARK_SPERMATOGENESIS

IR C Down

print(ks_hall$data$ir_c_dn$plots[ks_hall$data$ir_c_dn$data$fdr<=0.01])
## $HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION

## 
## $HALLMARK_MYC_TARGETS_V1

## 
## $HALLMARK_PROTEIN_SECRETION

## 
## $HALLMARK_ALLOGRAFT_REJECTION

## 
## $HALLMARK_DNA_REPAIR

## 
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

## 
## $HALLMARK_IL2_STAT5_SIGNALING